Title 1

Title 2

Load Packages

library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
source("functions.R")

I downloaded file

download.file("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/_episodes_rmd/data/gapminder-FiveYearData.csv", destfile = "data/gapminder-FiveYearData.csv")

gapminder <- read.csv("data/gapminder-FiveYearData.csv")
gapminder <- read.csv("data/gapminder-FiveYearData.csv")
head(gapminder)
##       country year      pop continent lifeExp gdpPercap
## 1 Afghanistan 1952  8425333      Asia  28.801  779.4453
## 2 Afghanistan 1957  9240934      Asia  30.332  820.8530
## 3 Afghanistan 1962 10267083      Asia  31.997  853.1007
## 4 Afghanistan 1967 11537966      Asia  34.020  836.1971
## 5 Afghanistan 1972 13079460      Asia  36.088  739.9811
## 6 Afghanistan 1977 14880372      Asia  38.438  786.1134

I wonder what is the life exp over the years

p <- ggplot(data=gapminder, aes(x=year,y=lifeExp)) + 
  geom_point()

p

lets do see

ggplotly(p)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Making your own functions

If you are repeating yourself in your code, you may be able to solve that problem by making your own function!

cars <- c(3,4,5,6,7,10)
se(cars)  
## [1] 1.013794

Data manipulation with dplyr

You will likely want to get subsections of your dataframe and/or calculate means of a variable for a certain subsection, dplyr is your friend!

gapminder <- read.csv("data/gapminder-FiveYearData.csv")
year_country_gdp <- select(gapminder,year,country,gdpPercap)
year_country_gdp <- select(gapminder,-pop,-continent,-lifeExp)
names(year_country_gdp)
## [1] "country"   "year"      "gdpPercap"

Explore Filter

# Long and bad performance way, but works for smal data
euro <- filter(gapminder, continent=="Europe")
year_country_gdp_euro <- select (euro, year, country, gdpPercap)

#Better way and fast processing
year_country_gdp_euro <- gapminder %>% 
  filter(continent=="Europe") %>% 
  select(year, country, gdpPercap)

year_country_gdp_euro
##     year                country  gdpPercap
## 1   1952                Albania  1601.0561
## 2   1957                Albania  1942.2842
## 3   1962                Albania  2312.8890
## 4   1967                Albania  2760.1969
## 5   1972                Albania  3313.4222
## 6   1977                Albania  3533.0039
## 7   1982                Albania  3630.8807
## 8   1987                Albania  3738.9327
## 9   1992                Albania  2497.4379
## 10  1997                Albania  3193.0546
## 11  2002                Albania  4604.2117
## 12  2007                Albania  5937.0295
## 13  1952                Austria  6137.0765
## 14  1957                Austria  8842.5980
## 15  1962                Austria 10750.7211
## 16  1967                Austria 12834.6024
## 17  1972                Austria 16661.6256
## 18  1977                Austria 19749.4223
## 19  1982                Austria 21597.0836
## 20  1987                Austria 23687.8261
## 21  1992                Austria 27042.0187
## 22  1997                Austria 29095.9207
## 23  2002                Austria 32417.6077
## 24  2007                Austria 36126.4927
## 25  1952                Belgium  8343.1051
## 26  1957                Belgium  9714.9606
## 27  1962                Belgium 10991.2068
## 28  1967                Belgium 13149.0412
## 29  1972                Belgium 16672.1436
## 30  1977                Belgium 19117.9745
## 31  1982                Belgium 20979.8459
## 32  1987                Belgium 22525.5631
## 33  1992                Belgium 25575.5707
## 34  1997                Belgium 27561.1966
## 35  2002                Belgium 30485.8838
## 36  2007                Belgium 33692.6051
## 37  1952 Bosnia and Herzegovina   973.5332
## 38  1957 Bosnia and Herzegovina  1353.9892
## 39  1962 Bosnia and Herzegovina  1709.6837
## 40  1967 Bosnia and Herzegovina  2172.3524
## 41  1972 Bosnia and Herzegovina  2860.1698
## 42  1977 Bosnia and Herzegovina  3528.4813
## 43  1982 Bosnia and Herzegovina  4126.6132
## 44  1987 Bosnia and Herzegovina  4314.1148
## 45  1992 Bosnia and Herzegovina  2546.7814
## 46  1997 Bosnia and Herzegovina  4766.3559
## 47  2002 Bosnia and Herzegovina  6018.9752
## 48  2007 Bosnia and Herzegovina  7446.2988
## 49  1952               Bulgaria  2444.2866
## 50  1957               Bulgaria  3008.6707
## 51  1962               Bulgaria  4254.3378
## 52  1967               Bulgaria  5577.0028
## 53  1972               Bulgaria  6597.4944
## 54  1977               Bulgaria  7612.2404
## 55  1982               Bulgaria  8224.1916
## 56  1987               Bulgaria  8239.8548
## 57  1992               Bulgaria  6302.6234
## 58  1997               Bulgaria  5970.3888
## 59  2002               Bulgaria  7696.7777
## 60  2007               Bulgaria 10680.7928
## 61  1952                Croatia  3119.2365
## 62  1957                Croatia  4338.2316
## 63  1962                Croatia  5477.8900
## 64  1967                Croatia  6960.2979
## 65  1972                Croatia  9164.0901
## 66  1977                Croatia 11305.3852
## 67  1982                Croatia 13221.8218
## 68  1987                Croatia 13822.5839
## 69  1992                Croatia  8447.7949
## 70  1997                Croatia  9875.6045
## 71  2002                Croatia 11628.3890
## 72  2007                Croatia 14619.2227
## 73  1952         Czech Republic  6876.1403
## 74  1957         Czech Republic  8256.3439
## 75  1962         Czech Republic 10136.8671
## 76  1967         Czech Republic 11399.4449
## 77  1972         Czech Republic 13108.4536
## 78  1977         Czech Republic 14800.1606
## 79  1982         Czech Republic 15377.2285
## 80  1987         Czech Republic 16310.4434
## 81  1992         Czech Republic 14297.0212
## 82  1997         Czech Republic 16048.5142
## 83  2002         Czech Republic 17596.2102
## 84  2007         Czech Republic 22833.3085
## 85  1952                Denmark  9692.3852
## 86  1957                Denmark 11099.6593
## 87  1962                Denmark 13583.3135
## 88  1967                Denmark 15937.2112
## 89  1972                Denmark 18866.2072
## 90  1977                Denmark 20422.9015
## 91  1982                Denmark 21688.0405
## 92  1987                Denmark 25116.1758
## 93  1992                Denmark 26406.7399
## 94  1997                Denmark 29804.3457
## 95  2002                Denmark 32166.5001
## 96  2007                Denmark 35278.4187
## 97  1952                Finland  6424.5191
## 98  1957                Finland  7545.4154
## 99  1962                Finland  9371.8426
## 100 1967                Finland 10921.6363
## 101 1972                Finland 14358.8759
## 102 1977                Finland 15605.4228
## 103 1982                Finland 18533.1576
## 104 1987                Finland 21141.0122
## 105 1992                Finland 20647.1650
## 106 1997                Finland 23723.9502
## 107 2002                Finland 28204.5906
## 108 2007                Finland 33207.0844
## 109 1952                 France  7029.8093
## 110 1957                 France  8662.8349
## 111 1962                 France 10560.4855
## 112 1967                 France 12999.9177
## 113 1972                 France 16107.1917
## 114 1977                 France 18292.6351
## 115 1982                 France 20293.8975
## 116 1987                 France 22066.4421
## 117 1992                 France 24703.7961
## 118 1997                 France 25889.7849
## 119 2002                 France 28926.0323
## 120 2007                 France 30470.0167
## 121 1952                Germany  7144.1144
## 122 1957                Germany 10187.8267
## 123 1962                Germany 12902.4629
## 124 1967                Germany 14745.6256
## 125 1972                Germany 18016.1803
## 126 1977                Germany 20512.9212
## 127 1982                Germany 22031.5327
## 128 1987                Germany 24639.1857
## 129 1992                Germany 26505.3032
## 130 1997                Germany 27788.8842
## 131 2002                Germany 30035.8020
## 132 2007                Germany 32170.3744
## 133 1952                 Greece  3530.6901
## 134 1957                 Greece  4916.2999
## 135 1962                 Greece  6017.1907
## 136 1967                 Greece  8513.0970
## 137 1972                 Greece 12724.8296
## 138 1977                 Greece 14195.5243
## 139 1982                 Greece 15268.4209
## 140 1987                 Greece 16120.5284
## 141 1992                 Greece 17541.4963
## 142 1997                 Greece 18747.6981
## 143 2002                 Greece 22514.2548
## 144 2007                 Greece 27538.4119
## 145 1952                Hungary  5263.6738
## 146 1957                Hungary  6040.1800
## 147 1962                Hungary  7550.3599
## 148 1967                Hungary  9326.6447
## 149 1972                Hungary 10168.6561
## 150 1977                Hungary 11674.8374
## 151 1982                Hungary 12545.9907
## 152 1987                Hungary 12986.4800
## 153 1992                Hungary 10535.6285
## 154 1997                Hungary 11712.7768
## 155 2002                Hungary 14843.9356
## 156 2007                Hungary 18008.9444
## 157 1952                Iceland  7267.6884
## 158 1957                Iceland  9244.0014
## 159 1962                Iceland 10350.1591
## 160 1967                Iceland 13319.8957
## 161 1972                Iceland 15798.0636
## 162 1977                Iceland 19654.9625
## 163 1982                Iceland 23269.6075
## 164 1987                Iceland 26923.2063
## 165 1992                Iceland 25144.3920
## 166 1997                Iceland 28061.0997
## 167 2002                Iceland 31163.2020
## 168 2007                Iceland 36180.7892
## 169 1952                Ireland  5210.2803
## 170 1957                Ireland  5599.0779
## 171 1962                Ireland  6631.5973
## 172 1967                Ireland  7655.5690
## 173 1972                Ireland  9530.7729
## 174 1977                Ireland 11150.9811
## 175 1982                Ireland 12618.3214
## 176 1987                Ireland 13872.8665
## 177 1992                Ireland 17558.8155
## 178 1997                Ireland 24521.9471
## 179 2002                Ireland 34077.0494
## 180 2007                Ireland 40675.9964
## 181 1952                  Italy  4931.4042
## 182 1957                  Italy  6248.6562
## 183 1962                  Italy  8243.5823
## 184 1967                  Italy 10022.4013
## 185 1972                  Italy 12269.2738
## 186 1977                  Italy 14255.9847
## 187 1982                  Italy 16537.4835
## 188 1987                  Italy 19207.2348
## 189 1992                  Italy 22013.6449
## 190 1997                  Italy 24675.0245
## 191 2002                  Italy 27968.0982
## 192 2007                  Italy 28569.7197
## 193 1952             Montenegro  2647.5856
## 194 1957             Montenegro  3682.2599
## 195 1962             Montenegro  4649.5938
## 196 1967             Montenegro  5907.8509
## 197 1972             Montenegro  7778.4140
## 198 1977             Montenegro  9595.9299
## 199 1982             Montenegro 11222.5876
## 200 1987             Montenegro 11732.5102
## 201 1992             Montenegro  7003.3390
## 202 1997             Montenegro  6465.6133
## 203 2002             Montenegro  6557.1943
## 204 2007             Montenegro  9253.8961
## 205 1952            Netherlands  8941.5719
## 206 1957            Netherlands 11276.1934
## 207 1962            Netherlands 12790.8496
## 208 1967            Netherlands 15363.2514
## 209 1972            Netherlands 18794.7457
## 210 1977            Netherlands 21209.0592
## 211 1982            Netherlands 21399.4605
## 212 1987            Netherlands 23651.3236
## 213 1992            Netherlands 26790.9496
## 214 1997            Netherlands 30246.1306
## 215 2002            Netherlands 33724.7578
## 216 2007            Netherlands 36797.9333
## 217 1952                 Norway 10095.4217
## 218 1957                 Norway 11653.9730
## 219 1962                 Norway 13450.4015
## 220 1967                 Norway 16361.8765
## 221 1972                 Norway 18965.0555
## 222 1977                 Norway 23311.3494
## 223 1982                 Norway 26298.6353
## 224 1987                 Norway 31540.9748
## 225 1992                 Norway 33965.6611
## 226 1997                 Norway 41283.1643
## 227 2002                 Norway 44683.9753
## 228 2007                 Norway 49357.1902
## 229 1952                 Poland  4029.3297
## 230 1957                 Poland  4734.2530
## 231 1962                 Poland  5338.7521
## 232 1967                 Poland  6557.1528
## 233 1972                 Poland  8006.5070
## 234 1977                 Poland  9508.1415
## 235 1982                 Poland  8451.5310
## 236 1987                 Poland  9082.3512
## 237 1992                 Poland  7738.8812
## 238 1997                 Poland 10159.5837
## 239 2002                 Poland 12002.2391
## 240 2007                 Poland 15389.9247
## 241 1952               Portugal  3068.3199
## 242 1957               Portugal  3774.5717
## 243 1962               Portugal  4727.9549
## 244 1967               Portugal  6361.5180
## 245 1972               Portugal  9022.2474
## 246 1977               Portugal 10172.4857
## 247 1982               Portugal 11753.8429
## 248 1987               Portugal 13039.3088
## 249 1992               Portugal 16207.2666
## 250 1997               Portugal 17641.0316
## 251 2002               Portugal 19970.9079
## 252 2007               Portugal 20509.6478
## 253 1952                Romania  3144.6132
## 254 1957                Romania  3943.3702
## 255 1962                Romania  4734.9976
## 256 1967                Romania  6470.8665
## 257 1972                Romania  8011.4144
## 258 1977                Romania  9356.3972
## 259 1982                Romania  9605.3141
## 260 1987                Romania  9696.2733
## 261 1992                Romania  6598.4099
## 262 1997                Romania  7346.5476
## 263 2002                Romania  7885.3601
## 264 2007                Romania 10808.4756
## 265 1952                 Serbia  3581.4594
## 266 1957                 Serbia  4981.0909
## 267 1962                 Serbia  6289.6292
## 268 1967                 Serbia  7991.7071
## 269 1972                 Serbia 10522.0675
## 270 1977                 Serbia 12980.6696
## 271 1982                 Serbia 15181.0927
## 272 1987                 Serbia 15870.8785
## 273 1992                 Serbia  9325.0682
## 274 1997                 Serbia  7914.3203
## 275 2002                 Serbia  7236.0753
## 276 2007                 Serbia  9786.5347
## 277 1952        Slovak Republic  5074.6591
## 278 1957        Slovak Republic  6093.2630
## 279 1962        Slovak Republic  7481.1076
## 280 1967        Slovak Republic  8412.9024
## 281 1972        Slovak Republic  9674.1676
## 282 1977        Slovak Republic 10922.6640
## 283 1982        Slovak Republic 11348.5459
## 284 1987        Slovak Republic 12037.2676
## 285 1992        Slovak Republic  9498.4677
## 286 1997        Slovak Republic 12126.2306
## 287 2002        Slovak Republic 13638.7784
## 288 2007        Slovak Republic 18678.3144
## 289 1952               Slovenia  4215.0417
## 290 1957               Slovenia  5862.2766
## 291 1962               Slovenia  7402.3034
## 292 1967               Slovenia  9405.4894
## 293 1972               Slovenia 12383.4862
## 294 1977               Slovenia 15277.0302
## 295 1982               Slovenia 17866.7218
## 296 1987               Slovenia 18678.5349
## 297 1992               Slovenia 14214.7168
## 298 1997               Slovenia 17161.1073
## 299 2002               Slovenia 20660.0194
## 300 2007               Slovenia 25768.2576
## 301 1952                  Spain  3834.0347
## 302 1957                  Spain  4564.8024
## 303 1962                  Spain  5693.8439
## 304 1967                  Spain  7993.5123
## 305 1972                  Spain 10638.7513
## 306 1977                  Spain 13236.9212
## 307 1982                  Spain 13926.1700
## 308 1987                  Spain 15764.9831
## 309 1992                  Spain 18603.0645
## 310 1997                  Spain 20445.2990
## 311 2002                  Spain 24835.4717
## 312 2007                  Spain 28821.0637
## 313 1952                 Sweden  8527.8447
## 314 1957                 Sweden  9911.8782
## 315 1962                 Sweden 12329.4419
## 316 1967                 Sweden 15258.2970
## 317 1972                 Sweden 17832.0246
## 318 1977                 Sweden 18855.7252
## 319 1982                 Sweden 20667.3812
## 320 1987                 Sweden 23586.9293
## 321 1992                 Sweden 23880.0168
## 322 1997                 Sweden 25266.5950
## 323 2002                 Sweden 29341.6309
## 324 2007                 Sweden 33859.7484
## 325 1952            Switzerland 14734.2327
## 326 1957            Switzerland 17909.4897
## 327 1962            Switzerland 20431.0927
## 328 1967            Switzerland 22966.1443
## 329 1972            Switzerland 27195.1130
## 330 1977            Switzerland 26982.2905
## 331 1982            Switzerland 28397.7151
## 332 1987            Switzerland 30281.7046
## 333 1992            Switzerland 31871.5303
## 334 1997            Switzerland 32135.3230
## 335 2002            Switzerland 34480.9577
## 336 2007            Switzerland 37506.4191
## 337 1952                 Turkey  1969.1010
## 338 1957                 Turkey  2218.7543
## 339 1962                 Turkey  2322.8699
## 340 1967                 Turkey  2826.3564
## 341 1972                 Turkey  3450.6964
## 342 1977                 Turkey  4269.1223
## 343 1982                 Turkey  4241.3563
## 344 1987                 Turkey  5089.0437
## 345 1992                 Turkey  5678.3483
## 346 1997                 Turkey  6601.4299
## 347 2002                 Turkey  6508.0857
## 348 2007                 Turkey  8458.2764
## 349 1952         United Kingdom  9979.5085
## 350 1957         United Kingdom 11283.1779
## 351 1962         United Kingdom 12477.1771
## 352 1967         United Kingdom 14142.8509
## 353 1972         United Kingdom 15895.1164
## 354 1977         United Kingdom 17428.7485
## 355 1982         United Kingdom 18232.4245
## 356 1987         United Kingdom 21664.7877
## 357 1992         United Kingdom 22705.0925
## 358 1997         United Kingdom 26074.5314
## 359 2002         United Kingdom 29478.9992
## 360 2007         United Kingdom 33203.2613
mean_gpd_percountry <- gapminder %>% 
  group_by(country) %>% 
  summarise(mean_gdp=mean(gdpPercap), 
            se_gdp=se(gdpPercap)
            )

mean_gpd_percountry
## # A tibble: 142 x 3
##        country   mean_gdp     se_gdp
##         <fctr>      <dbl>      <dbl>
##  1 Afghanistan   802.6746   31.23550
##  2     Albania  3255.3666  344.20223
##  3     Algeria  4426.0260  378.26190
##  4      Angola  3607.1005  336.56641
##  5   Argentina  8955.5538  537.68144
##  6   Australia 19980.5956 2256.11315
##  7     Austria 20411.9163 2787.23968
##  8     Bahrain 18077.6639 1563.29518
##  9  Bangladesh   817.5588   67.86165
## 10     Belgium 19900.7581 2422.32683
## # ... with 132 more rows

Challenge: I want the mean, se, and sample size of life expentancy by continent

mean_se_sample <- gapminder %>% 
  group_by(continent) %>% 
  summarise(mean_lifeExp=mean(lifeExp),
            se_lifeExp=se(lifeExp),
            samsize_lifeExp=n())

mean_se_sample
## # A tibble: 5 x 4
##   continent mean_lifeExp se_lifeExp samsize_lifeExp
##      <fctr>        <dbl>      <dbl>           <int>
## 1    Africa     48.86533  0.3663016             624
## 2  Americas     64.65874  0.5395389             300
## 3      Asia     60.06490  0.5962151             396
## 4    Europe     71.90369  0.2863536             360
## 5   Oceania     74.32621  0.7747759              24

By continent and country

mean_se_sample <- gapminder %>% 
  group_by(continent, country) %>% 
  summarise(mean_lifeExp=mean(lifeExp),
            se_lifeExp=se(lifeExp),
            samsize_lifeExp=n())

mean_se_sample
## # A tibble: 142 x 5
## # Groups:   continent [?]
##    continent                  country mean_lifeExp se_lifeExp
##       <fctr>                   <fctr>        <dbl>      <dbl>
##  1    Africa                  Algeria     59.03017  2.9849208
##  2    Africa                   Angola     37.88350  1.1562236
##  3    Africa                    Benin     48.77992  1.7691977
##  4    Africa                 Botswana     54.59750  1.7116922
##  5    Africa             Burkina Faso     44.69400  1.9762099
##  6    Africa                  Burundi     44.81733  0.9165096
##  7    Africa                 Cameroon     48.12850  1.5784640
##  8    Africa Central African Republic     43.86692  1.3627459
##  9    Africa                     Chad     46.77358  1.4110376
## 10    Africa                  Comoros     52.38175  2.3476081
## # ... with 132 more rows, and 1 more variables: samsize_lifeExp <int>

combining ggplot and dplyr

az_countries <- gapminder %>% 
  filter(continent=="Europe") %>% 
  ggplot(aes(x=year,y=lifeExp, color=country)) +
  geom_line() + 
  facet_wrap(~country)

az_countries

ggsave("euro_az_countries.png")
## Saving 7 x 5 in image
write.csv(mean_se_sample, "data/mean_se_sample.csv")

Data manipulation with tidyr

R likes to have ‘long’ format data where every row is an observation and you have a single column for ‘observations’ the others serve to identify that observation. (exceptions apply when you have multiple types of observations) To switch back and forth from ‘wide’ (how we typically enter data in a spreadsheet) to ‘long’ use tidyr

#command to down the wide data
download.file("https://raw.githubusercontent.com/swcarpentry/r-novice-gapminder/gh-pages/data/gapminder_wide.csv", destfile = "data/gapminder_wide.csv")

gapminder_wide <- read.csv("data/gapminder_wide.csv")

gap_long <- gapminder_wide %>% 
  gather(obstype_year, obs_values, 
         starts_with('pop'), 
         starts_with('lifeExp'), 
         starts_with('gdpPercap'))

head(gap_long)
##   continent      country obstype_year obs_values
## 1    Africa      Algeria     pop_1952    9279525
## 2    Africa       Angola     pop_1952    4232095
## 3    Africa        Benin     pop_1952    1738315
## 4    Africa     Botswana     pop_1952     442308
## 5    Africa Burkina Faso     pop_1952    4469979
## 6    Africa      Burundi     pop_1952    2445618

Separate the obs_type column

gap_normal <- gap_long %>% 
  separate(obstype_year,into=c("obs_type", "obs_year"), sep='_') %>% 
  spread(obs_type, obs_values)

head(gap_normal)
##   continent country obs_year gdpPercap lifeExp      pop
## 1    Africa Algeria     1952  2449.008  43.077  9279525
## 2    Africa Algeria     1957  3013.976  45.685 10270856
## 3    Africa Algeria     1962  2550.817  48.303 11000948
## 4    Africa Algeria     1967  3246.992  51.407 12760499
## 5    Africa Algeria     1972  4182.664  54.518 14760787
## 6    Africa Algeria     1977  4910.417  58.014 17152804
#gap_normal <- gap_normal %>% 
##  arrange(country,continent,year)
#all.equal(gapminder, gap_normal)
head(gap_normal)
##   continent country obs_year gdpPercap lifeExp      pop
## 1    Africa Algeria     1952  2449.008  43.077  9279525
## 2    Africa Algeria     1957  3013.976  45.685 10270856
## 3    Africa Algeria     1962  2550.817  48.303 11000948
## 4    Africa Algeria     1967  3246.992  51.407 12760499
## 5    Africa Algeria     1972  4182.664  54.518 14760787
## 6    Africa Algeria     1977  4910.417  58.014 17152804